Setup

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = TRUE)
knitr::opts_knit$set(root.dir = '/Volumes/JARC_2T/NSF-CREST_Postdoc/LongTerm_Hypoxia_exp/AplCal_hypoxia/miRNA/')
# Load libraries
library(DESeq2)    # BiocManager::install('DESeq2')
library(limma)     # BiocManager::install('limma')
library(EnhancedVolcano) # BiocManager::install('EnhancedVolcano')
library(stringr)
library(readxl)
library(pheatmap)
library(RColorBrewer)
library(variancePartition)  # BiocManager::install('variancePartition')
library(doParallel)
library(cowplot)
library(adegenet)
library(ggpubr)
library(kableExtra)
library(tidyverse)
library(ggfortify) # install.packages("ggfortify")
library(Biobase)
library(arrayQualityMetrics)
library(vegan)
library(ggvenn) # install.packages("ggvenn")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("gaospecial/ggVennDiagram")
library(ggVennDiagram)

## ggplot theme
theme_custom <- function() {
  theme_bw(base_size = 10) %+replace%    #, base_family = "Arial"
    theme(
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(), 
      panel.background = element_blank(),
      panel.border = element_rect(color = "black", fill = NA),
      legend.background = element_rect(fill = NA, colour = NA),
      axis.text.x = element_text(angle=45, hjust=1, vjust = 1)
    )
}
## ggplot labeller
colnames <- c(
  `A` = "abdominal",
  `Pp` = "pleural & pedal"
)

parents <- c(
  `60` = "lab-reared",
  `71` = "wild"
)

global_labeller <- labeller(
  tissue = colnames,
  batch = parents,
  .default = "label_parsed"
)

Load data

miRNA and sncRNA annotations

annot <- read.delim("data/Results.txt", sep = "\t")
miRNA <- annot[which(annot$MIRNA == "Y"), c(2,21)]
knownmiRNA <- na.omit(miRNA)
predictmiRNA <- miRNA[which(is.na(miRNA$KnownRNAs)),]
doubt_miRNA <- annot[,c(2,20:21)]
doubt_miRNA <- na.omit(doubt_miRNA)
doubt_miRNA <- doubt_miRNA[which(doubt_miRNA$MIRNA == "N"),]

annot_known <- annot
annot_known$geneID <- annot$Name
annot_known$geneID <- gsub(knownmiRNA$Name, knownmiRNA$KnownRNAs, annot$Name)
annot_known <- annot_known[, c(2,22)]

Gene count data

count <- read.delim("data/Counts.txt", sep = "\t")
count <- merge(count, annot_known, by = "Name")
rownames(count) <- count$geneID  
countMatrix <- count[,-c(1:3,24,34)] # eliminate the outlier sample Ac204A 
countMatrix <- countMatrix[, order(names(countMatrix))]

miRNA_counts <- count[which(count$MIRNA == "Y"),]
miRNA_countMatrix <- miRNA_counts[,-c(1:3,24,34)]

doubt_miRNA_counts <- count[which(count$Name %in% doubt_miRNA$Name),]
doubt_miRNA_countMatrix <- doubt_miRNA_counts[,-c(1:3,24,34)]

Sample metadata

# Import sample metadata
sdat0 <- read_csv("data/coldata.csv") 
colnames(sdat0) <- c("FileName","sample_ID","animal_ID","tissue","treatment","batch","sample_ID")
sdat0$treatment <- gsub("H_6h", "Hyp_T6h", sdat0$treatment)
sdat0$treatment <- gsub("H_6-days", "LtH_6", sdat0$treatment)
sdat0 <- sdat0[-15,]

# add sample IDs to count matrix
colnames(countMatrix) <- sdat0$sample_ID
colnames(miRNA_countMatrix) <- sdat0$sample_ID
colnames(doubt_miRNA_countMatrix) <- sdat0$sample_ID

# subset coldata to include only abdominal samples

sdat0 <- sdat0 %>%
  filter(tissue=="Abdominal") %>%
  mutate(batch.group = interaction(batch, treatment)) %>%
  mutate_if(is.character, as.factor) %>%
  mutate(batch=as.factor(batch))


sdat <- sdat0 %>%                              # order rows by sample name
  column_to_rownames(var = "sample_ID")               # set sample name to rownames

countMatrix <- countMatrix[, colnames(countMatrix) %in% sdat0$sample_ID]
miRNA_countMatrix <- miRNA_countMatrix[, colnames(miRNA_countMatrix) %in% sdat0$sample_ID]
doubt_miRNA_countMatrix <- doubt_miRNA_countMatrix[, colnames(doubt_miRNA_countMatrix) %in% sdat0$sample_ID]
save(sdat,sdat0,countMatrix,miRNA_countMatrix,doubt_miRNA_countMatrix, file = "output/counts_and_meta.RData")

#sdat$treatment <- factor(sdat$treatment, levels = c("Control","Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery"), ordered = T)
#sdat$group <- paste(sdat$treatment, sdat$batch, sdat$tissue, sep = "")

Create DESeqDataSet

# load(file = "output/counts_and_meta.RData")

# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = countMatrix,
                              colData = sdat,
                              design = ~ batch+treatment)

# Subset DESeqDataSet
## Subset for batch and ganglia
dds_60 <- dds[, colData(dds)$batch == 60] 

dds_71 <- dds[, colData(dds)$batch == 71]

### Create DESeqDataSet for miRNA only

dds_miRNA <- DESeqDataSetFromMatrix(countData = miRNA_countMatrix,
                              colData = sdat,
                              design = ~ batch+treatment)
dds_miRNA <- dds_miRNA[ rowMeans(counts(dds_miRNA)) > 1, ]
vsd_miRNA <- varianceStabilizingTransformation(dds_miRNA, blind = TRUE)

Filter and transform count data

# Remove genes with less than 1 mean count across samples
dds <- dds[ rowMeans(counts(dds)) > 1, ]
dds_60 <- dds_60[ rowMeans(counts(dds_60)) > 1, ]
dds_71 <- dds_71[ rowMeans(counts(dds_71)) > 1, ]

# Normalize expression data for visualization purposes using VST tranformation
vsd <- vst(dds, blind = TRUE)
vsd_60 <- vst(dds_60, blind = TRUE)
vsd_71 <- vst(dds_71, blind = TRUE)

Visualize transcriptome

Principal Coordinates Analysis

## Calculate distances among samples
sampleDists <- dist(t(assay(vsd)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd)) %>%
  
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(fillcol = factor(ifelse(batch == 60, 1, 0)))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(batch, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = batch)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
               lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2, fill = treatment)) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`, fill = treatment), show.legend = F) +
  scale_alpha_manual(values = c(1, 0), name = "batch", labels = c("lab_reared parents", "wild parents")) +
  guides(shape = guide_legend(order = 2, override.aes = list(fill = "black"))) +
  guides(alpha = guide_legend(override.aes = list(shape = 21, alpha = 1, fill = c("black", "white")))) +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing.y = unit(0, "cm"))

pcoa

#ggsave(filename = "figures/Fig1.pcoa_sncRNA_GE.png",pcoa, width = 5, height = 5)

The PCoA shows that sncRNA expression varies significantly by batch along PC1, and both batched show a separation by treatment mostly across PC2

Only on miRNA positive transcripts

## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_miRNA)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_miRNA)) %>%
  
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(fillcol = factor(ifelse(batch == 60, 1, 0)))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(batch, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = batch)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
               lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2, fill = treatment)) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`, fill = treatment), show.legend = F) +
  scale_alpha_manual(values = c(1, 0), name = "batch", labels = c("lab_reared parents", "wild parents")) +
  guides(shape = guide_legend(order = 2, override.aes = list(fill = "black"))) +
  guides(alpha = guide_legend(override.aes = list(shape = 21, alpha = 1, fill = c("black", "white")))) +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing.y = unit(0, "cm"))

pcoa

#ggsave(filename = "figures/Fig.pcoa_miRNA_only_GE.png",pcoa, width = 5, height = 5)

Differential expression analysis

Run DESeq

# Run DESeq pipeline
design(dds) <- formula(~ batch.group)
dsr1 <- DESeq(dds)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T6h","LtH_6","Recovery","Recovery","LtH_6","Recovery"),
                          den = c("Control","Control","Control","Hyp_T6h","Hyp_T6h","LtH_6"))

# Get DESeq results for all group contrasts for each tissue
DE <- crossing(batch = c("60", "71"), group.contrasts) %>%
  mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
    results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))

# Run DESeq pipeline for both batches together 
design(dds) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds)

# Get DESeq results for all contrasts and join with results from each colony, from above

DE <- crossing(batch = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE)

## Plot results

DE_60 <- DE[which(DE$batch=="60"),]
names= paste(DE_60$num," vs ",DE_60$den)

volcano_List = list()
for (i in c(1:6)){
  name = names[i]
  rdf=data.frame(DE_60$dsr[[i]])
  plt=EnhancedVolcano(rdf,
    lab = rownames(rdf),
    x = "log2FoldChange",
    y = "padj",
    legendPosition = 'none',
    title = name,
    subtitle = NULL,
    caption = NULL)
  volcano_List[[i]] = plt
}

volc_plot1 <- plot_grid(plotlist=volcano_List, nrow=2)

volc_plot1

DE_71 <- DE[which(DE$batch=="71"),]
names= paste(DE_71$num," vs ",DE_71$den)

volcano_List = list()
for (i in c(1:6)){
  name = names[i]
  rdf=data.frame(DE_71$dsr[[i]])
  plt=EnhancedVolcano(rdf,
    lab = rownames(rdf),
    x = "log2FoldChange",
    y = "padj",
    legendPosition = 'none',
    title = name,
    subtitle = NULL,
    caption = NULL)
  volcano_List[[i]] = plt
}

volc_plot2 <- plot_grid(plotlist=volcano_List, nrow=2)

volc_plot2

#ggsave(filename = "figures/Fig.volcano_naive_GE.png",volc_plot1, width = 15, height = 10)
#ggsave(filename = "figures/Fig.volcano_pre-exp_GE.png",volc_plot2, width = 15, height = 10)
# Run DESeq pipeline
design(dds_miRNA) <- formula(~ batch.group)
dsr1 <- DESeq(dds_miRNA)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T6h","LtH_6","Recovery","Recovery","LtH_6","Recovery"),
                          den = c("Control","Control","Control","Hyp_T6h","Hyp_T6h","LtH_6"))

# Get DESeq results for all group contrasts for each tissue
DE_miRNA <- crossing(batch = c("60", "71"), group.contrasts) %>%
  mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
    results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))

# Run DESeq pipeline for both batches together 
design(dds_miRNA) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds_miRNA)

# Get DESeq results for all contrasts and join with results from each colony, from above

DE_miRNA <- crossing(batch = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE_miRNA)

## Plot results

DE_miRNA60 <- DE_miRNA[which(DE_miRNA$batch=="60"),]
names= paste(DE_miRNA60$num," vs ",DE_miRNA60$den)

volcano_List = list()
for (i in c(1:6)){
  name = names[i]
  rdf=data.frame(DE_miRNA60$dsr[[i]])
  plt=EnhancedVolcano(rdf,
    lab = rownames(rdf),
    x = "log2FoldChange",
    y = "padj",
    pCutoff = 0.1,
    legendPosition = 'none',
    title = name,
    subtitle = NULL,
    caption = NULL)
  volcano_List[[i]] = plt
}

volc_plot1 <- plot_grid(plotlist=volcano_List, nrow=2)

volc_plot1

DE_miRNA71 <- DE_miRNA[which(DE_miRNA$batch=="71"),]
names= paste(DE_miRNA71$num," vs ",DE_miRNA71$den)

volcano_List = list()
for (i in c(1:6)){
  name = names[i]
  rdf=data.frame(DE_miRNA71$dsr[[i]])
  plt=EnhancedVolcano(rdf,
    lab = rownames(rdf),
    x = "log2FoldChange",
    y = "padj",
    pCutoff = 0.1,
    legendPosition = 'none',
    title = name,
    subtitle = NULL,
    caption = NULL)
  volcano_List[[i]] = plt
}

volc_plot2 <- plot_grid(plotlist=volcano_List, nrow=2)

volc_plot2

#ggsave(filename = "figures/Fig.volcano_miRNA_naive_GE.png",volc_plot1, width = 15, height = 10)
#ggsave(filename = "figures/Fig.volcano_miRNA_pre-exp_GE.png",volc_plot2, width = 15, height = 10)

Get significant DEGs

DE <- DE %>%
  mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.05), ]), "gene")),
          up = map(sig, ~ filter(., log2FoldChange > 0)),
        down = map(sig, ~ filter(., log2FoldChange < 0))) 

# Generate logP values for all differential expression contrasts
DE <- DE %>% mutate(logP = map(dsr, ~ data.frame(
  gene = rownames(data.frame(.)),
  logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))

# Count number of differentially expressed genes within each tissue, and overall, for each contrast
DEtab <- DE %>%
  filter((num == "Hyp_T6h" & den == "Control")|
         (num == "LtH_6" & den == "Control")|
         (num == "Recovery" & den == "Control")|
         (num == "Recovery" & den == "Hyp_T6h")|
         (num == "LtH_6" & den == "Hyp_T6h")|
         (num == "Recovery" & den == "LtH_6")) %>%
  mutate(nsig = map_dbl( sig, ~ nrow(.)),
          nup = map_dbl(  up, ~ nrow(.)),
          ndn = map_dbl(down, ~ nrow(.)),
          `DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
  mutate(group = case_when(batch == "60" ~ "Naive", batch == "71" ~ "Pre-exposed", 
                           batch == "all" ~ "all groups")) %>%
  mutate(group = factor(group, levels = c("Naive", "Pre-exposed", "all groups"))) %>%
  select(group, num, den, `DEGs [up, down]`) %>%
  spread(group, `DEGs [up, down]`)

DEtab$contrast <- paste(DEtab$num, DEtab$den, sep = " vs ")
dek <- DEtab %>% select(c(6,3:5)) %>%
  knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )  %>%
  kable_styling(full_width = TRUE)

dek
Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.
contrast Naive Pre-exposed all groups
Hyp_T6h vs Control 211 [81, 130] 152 [67, 85] 270 [112, 158]
LtH_6 vs Control 171 [70, 101] 256 [174, 82] 236 [125, 111]
LtH_6 vs Hyp_T6h 28 [17, 11] 137 [103, 34] 13 [4, 9]
Recovery vs Control 217 [82, 135] 247 [145, 102] 405 [193, 212]
Recovery vs Hyp_T6h 35 [13, 22] 127 [49, 78] 8 [3, 5]
Recovery vs LtH_6 48 [14, 34] 20 [10, 10] 11 [4, 7]
DEGs_60 <- DE %>%
  unnest(sig) %>%
  filter(batch == "60") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)

DEGs_60_wide <- list(Hyp_T6h=pull(DEGs_60[which(DEGs_60$contrast=="Hyp_T6h.Control"),2]),
                 Hyp_T6d=pull(DEGs_60[which(DEGs_60$contrast=="LtH_6.Control"),2]))
str(DEGs_60_wide)
## List of 2
##  $ Hyp_T6h: chr [1:211] "Cluster_26" "Cluster_28" "Cluster_58" "Cluster_61" ...
##  $ Hyp_T6d: chr [1:171] "Cluster_26" "Cluster_57" "Cluster_58" "Cluster_94" ...
gv1 <- ggvenn(DEGs_60_wide, fill_color = c("#0073C2FF", "#EFC000FF"),
  stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F 
  )
gv1

#ggsave("figures/Naive_DEGs_vennDiagram.png", gv1, height = 3, width = 5)

DEGs_71 <- DE %>%
  unnest(sig) %>%
  filter(batch == "71") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
DEGs_71_wide <- list(Hyp_T6h=pull(DEGs_71[which(DEGs_71$contrast=="Hyp_T6h.Control"),2]),
                 Hyp_T6d=pull(DEGs_71[which(DEGs_71$contrast=="LtH_6.Control"),2]))
str(DEGs_71_wide)
## List of 2
##  $ Hyp_T6h: chr [1:152] "Cluster_26" "Cluster_29" "Cluster_33" "Cluster_58" ...
##  $ Hyp_T6d: chr [1:256] "Cluster_25" "Cluster_26" "Cluster_33" "Cluster_34" ...
gv2 <- ggvenn(DEGs_71_wide, fill_color = c("#0073C2FF", "#EFC000FF"),
  stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F 
  )
gv2

#ggsave("figures/Pre_exposed_DEGs_vennDiagram.png", gv2, height = 3, width = 5)

DEGs <- DE %>%
  unnest(sig) %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  filter(contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
  select(.,contrast, gene, batch)
DEGs <- list(wild=pull(DEGs[which(DEGs$batch=="71"),2]),
                 lab_reared=pull(DEGs[which(DEGs$batch=="60"),2]))
str(DEGs)
## List of 2
##  $ wild      : chr [1:408] "Cluster_26" "Cluster_29" "Cluster_33" "Cluster_58" ...
##  $ lab_reared: chr [1:382] "Cluster_26" "Cluster_28" "Cluster_58" "Cluster_61" ...
gv3 <- ggvenn(DEGs, fill_color = c("#0073C2FF", "#EFC000FF"),
  stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F 
  )
gv3

#ggsave("figures/all_DEGs_vennDiagram.png", gv3, height = 3, width = 5)

DEG Similarities

Similarity of differentially expressed sncRNAs across batches and contrasts

DEGs in common across contrasts and batches
# Find genes that are DE in same direction in both batches when analyzed individually
DE %>%
  unnest(sig) %>%
  filter(batch != "all") %>%
  group_by(num, den) %>%
  dplyr::count(gene, log2FoldChange > 0) %>%   # is same gene DE in same direction?
  summarise(`shared` = sum(n==2)) %>%
  knitr::kable(format="markdown", caption = "Number of differentially expressed small RNAs in common between Naive and Pre-exposed animals")  %>%
  kable_styling(full_width = TRUE)
Number of differentially expressed small RNAs in common between Naive and Pre-exposed animals
num den shared
Hyp_T6h Control 83
LtH_6 Control 82
LtH_6 Hyp_T6h 2
Recovery Control 84
Recovery Hyp_T6h 1
Recovery LtH_6 0
DEGs_60 <- DE %>%
  unnest(sig) %>%
  filter(batch == "60") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene, log2FoldChange)
#write_tsv(DEGs_60, path = "output/DEGs_by_contrast_Naive.tsv")

DEGs_71 <- DE %>%
  unnest(sig) %>%
  filter(batch == "71") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene, log2FoldChange)
#write_tsv(DEGs_71, path = "output/DEGs_by_contrast_batch 71_pre-exposed.tsv")

# Shared 
DEGs_shared <- DE %>%
  unnest(sig) %>%
  filter(batch != "all") %>%
  mutate(contrast=paste(num, den, sep = ".")) %>%
  select(.,contrast, batch, gene, log2FoldChange) %>%
  group_by(.,contrast) %>%
  dplyr::count(gene, log2FoldChange > 0) %>% 
  filter(n != 1) 

colnames(DEGs_shared) <- c("contrast", "gene", "reg", "n")
DEGs_shared$reg <- gsub(TRUE, "Up", DEGs_shared$reg) 
DEGs_shared$reg <- gsub(FALSE, "Down", DEGs_shared$reg) 
DEGs_shared <- DEGs_shared[,-4]

#write_tsv(DEGs_shared, path = "output/DEGs_shared_batches.tsv")

DEGs in common beteween peak of exposure vs Ctrl contrasts (T6h vs Ctrol, LtH_6 vs Ctrol)

in batch 60 abdominal

genesincommon_60 <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(batch == "60", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))


genesincommon_60 <- genesincommon_60[c(1:100),]  
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_60[which(rownames(assay(vsd_60)) %in% genesincommon_60$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_60$gene))) 


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_60 %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

#ggsave(file = "figures/Fig_shared_sncRBA_hyp_60.png", width = 360, height = 360, units = "mm")

in batch 71

genesincommon_71 <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(batch == "71", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))

# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_71[which(rownames(assay(vsd_71)) %in% genesincommon_71$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_71$gene))) 


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_71 %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

#ggsave(file = "figures/Fig_shared_sncRNA_hyp_71.png", width = 360, height = 360, units = "mm")